## Setup ----

if(!require("pacman")) install.packages("pacman"); library("pacman")
pacman::p_load('tidybayes', 'egg','see','simr','RColorBrewer','emmeans','lme4', 'tidyverse', 'rstudioapi', 'rstanarm', 'bayestestR', 'BayesFactor', 'bayesplot', 'XNomial')

current_path <- getActiveDocumentContext()$path    ## gets path for current R script
setwd(dirname(current_path))                       ## sets dir to R script 

## Define functions ----

prop.less.than.5 <- function(x){length(x[x<.5]) / length(x)}        # Checks how much of a vector is less than 0
prop.more.than.33 <- function(x){length(x[x > 1/3]) / length(x)}   # Checks how much of a vector is more than 1/3
prop.more.than.66 <- function(x){length(x[x > 2/3]) / length(x)}   # Checks how much of a vector is more than 2/3

## Descriptive stats for each study ----

# Study 1
sample.stats.study.1 <- read.csv("poe11.csv") %>%
  as_tibble() %>%
  filter(Coins.exclusion == 0) %>% # Note: 2 excluded as fuss outs / failing training
  separate(age.years.months.days, into = c("years.of.age", "months.of.age", "days.of.age")) %>%
  mutate(approx.age.years = ((as.numeric(years.of.age) * 365.25) + (as.numeric(months.of.age) * 29.53) + as.numeric(days.of.age)) / 365.25) 

nrow(sample.stats.study.1)
mean(sample.stats.study.1$approx.age.years)
range(sample.stats.study.1$approx.age.years)
table(sample.stats.study.1$gender)

# Study 2

sample.stats.study.2 <- read.csv("poe12.csv") %>%
  as_tibble() %>%
  filter(exclude.coins != "Y" & dob != "") %>% # Eliminate exclusions and some junk rows from the downloaded .csv. 8 fuss outs / training failers (we're not reporting kids who were too old or didn't speak English)
  separate(age.years.months.days, into = c("years.of.age", "months.of.age", "days.of.age")) %>%
  mutate(approx.age.years = ((as.numeric(years.of.age) * 365.25) + (as.numeric(months.of.age) * 29.53) + as.numeric(days.of.age)) / 365.25) 

nrow(sample.stats.study.2) 
mean(sample.stats.study.2$approx.age.years)
range(sample.stats.study.2$approx.age.years)
table(sample.stats.study.2$gender)

# Study 3

sample.stats.study.3 <- read.csv("poe13.csv") %>%
  as_tibble() %>%
  filter(excluded == "no") %>% # 9 fuss outs / training failers
  separate(age.years.months.days, into = c("years.of.age", "months.of.age", "days.of.age")) %>%
  mutate(approx.age.years = ((as.numeric(years.of.age) * 365.25) + (as.numeric(months.of.age) * 29.53) + as.numeric(days.of.age)) / 365.25) 

nrow(sample.stats.study.3)
mean(sample.stats.study.3$approx.age.years)
range(sample.stats.study.3$approx.age.years)
table(sample.stats.study.3$gender)

all.ages <- c(sample.stats.study.1$approx.age.years, sample.stats.study.2$approx.age.years, sample.stats.study.3$approx.age.years) 
length(all.ages) # 72 kids total
mean(all.ages) # mean age 3.57
summary(all.ages) # Min age 3.01 max age 3.96
## Load all shaped data into a single data frame

## Wrangle data into long format ----

# Put Study 1 data into long format
study.1.pick <- read.csv("poe11.csv") %>%
  filter(Coins.exclusion == 0) %>%
  select(id = id.num, test.1.choice:test.4.choice) %>%
  pivot_longer(cols = test.1.choice:test.4.choice, names_to = "trial", values_to = "answer") %>%
  mutate(response = ifelse(answer == "certain", 1, 0),
         study = "Study 1") %>%
  select(id, study, response) %>%
  mutate(id = id + 2000) %>%
  mutate(measure = "Pick.from.3") 

# Put study 2 data into long format

study.2.throw <- read.csv("poe12.csv") %>% # Load and shape Matt's data
  filter(exclude.coins == "") %>%
  select(id.num, test.1.choice:test.8.choice, -starts_with("coins")) %>%
  pivot_longer(-id.num, names_to = "trial", values_to = "answer") %>%
  mutate(id = id.num,
         response = ifelse(answer == "certain", 0, 1), # Currently coded for "Choosing wisely"; switch values to code for "Choosing singleton"
         study = "Study 2") %>%
  select(id, study, response) %>%
  mutate(measure = "Pick.from.3") %>%
  mutate(measure = "Study2.throw") 

# Put study 3 DV1 data into long format
study.3.throw <- read.csv("poe13.csv")  %>% # Load and shape Stephanie's data
  filter(excluded == "no") %>%
  select(id, ends_with("throw"), -starts_with("p")) %>%
  pivot_longer(-id, names_to = "trial", values_to = "answer") %>%
  mutate(response = ifelse(answer == "certain", 0, 1), # Currently coded for "Choosing wisely"; switch values to code for "Choosing singleton"
         study = "Study 3") %>%
  select(id, study, response) %>%
  mutate(measure = "Study3.throw") 

# Put study 3 DV2 data into long format
study.3.pick <- read.csv("poe13.csv")  %>% # Load and shape Stephanie's data
  filter(excluded == "no") %>%
  select(id, ends_with("throw"), ends_with("open"), -starts_with("p")) %>%
  pivot_longer(-id, names_to = c("trial", ".value"), names_pattern = "(.[0-9]).(.+)") %>%
  filter(throw != "certain") %>%
  mutate(response = ifelse(open == "certain", 1, 0),
         study = "Study 3") %>%
  select(id, study, response) %>%
  mutate(measure = "Pick.from.2") 

# Put all the data into one df
chests.df <- study.1.pick %>%
  rbind(study.2.throw, study.3.throw, study.3.pick) %>%
  mutate(measure = factor(measure, levels = c("Pick.from.3", "Study2.throw", "Study3.throw", "Pick.from.2")))

## Fit model ----

# set.seed(804648)
# all.comps.model <- stan_glmer(response ~ measure + (1|id), data = chests.df, family = binomial, iter = 3500, warmup = 1000)
# saveRDS(all.comps.model, "all.comps.model.rds")
all.comps.model <- readRDS("all.comps.model.rds")

# Graphical assessment of model fit
set.seed(953)
ppc_dens_overlay(y = all.comps.model$y, yrep = posterior_predict(all.comps.model, draws = 100))

# Use emmeans to identify parameters of interest
all.comps.model.emm <- emmeans(all.comps.model, specs = "measure", type = "response")
# Use bayestestR to extract desired posteriors
all.comps.model.post <- insight::get_parameters(all.comps.model.emm)
# Put posteriors into long format and wrangle/clean
all.comps.model.post.long <- all.comps.model.post %>%
  pivot_longer(everything(), names_to = "measure", values_to = "sample") %>%
  mutate(measure = factor(measure, levels = c("Pick.from.3", "Study2.throw", "Study3.throw", "Pick.from.2")),
    study = factor(ifelse(grepl("from.3", measure), "Study 1",
                        ifelse(grepl("Study2", measure), "Study 2", "Study 3"))))

## Analyze results from model ----

# Wrangle model posterior for plotting
all.comps.model.plot <- as.data.frame(all.comps.model.emm) %>%
  mutate(measure = factor(measure, levels = c("Pick.from.3", "Study2.throw", "Study3.throw", "Pick.from.2")),
         study = factor(ifelse(grepl("from.3", measure), "Study 1",
                               ifelse(grepl("Study2", measure), "Study 2", "Study 3"))))

# Calculate proportions by kid
props.wise.by.kid <- chests.df %>%
  group_by(id, study, measure) %>%
  summarize(prop.wise = mean(response)) %>%
  mutate(study = factor(ifelse(grepl("from.3", measure), "Study 1",
                               ifelse(grepl("Study2", measure), "Study 2", "Study 3"))))

ggplot(all.comps.model.plot, aes(x = measure, y = prob)) +
  facet_grid(.~ study, scales = "free_x", space = "free_x") +
  geom_violinhalf(data = all.comps.model.post.long, aes(x = measure, y = sample), color = "black", fill = "darkgrey", alpha = .35) +
  geom_dotplot(data = props.wise.by.kid, aes(x = measure, y = prop.wise), binaxis = "y",
               position = position_nudge(x = -0.15), dotsize = .45, color = "white", fill = "black") +
  geom_errorbar(aes(ymin = lower.HPD, ymax = upper.HPD), size = .6, width = .2, color = "red", alpha = .85) +
  geom_point(shape = 23, size = 5, color = "red", fill = "white", stroke = 1.25) +
  ylim(0,1) +
  theme_bw() +
  theme(text = element_text(size=18), axis.ticks.y=element_blank(), axis.text.x = element_blank(),
        panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), plot.background = element_blank(), 
        panel.background = element_blank(), legend.title = element_blank(), strip.background = element_blank()) +
  labs(y = "Probability of choosing wisely", x = "")

# A modified version, for increased clarity?
ggplot(all.comps.model.plot, aes(x = measure, y = prob)) +
  facet_grid(.~ study, scales = "free_x", space = "free_x") +
  geom_violinhalf(data = all.comps.model.post.long, aes(x = measure, y = sample), color = "black", fill = "darkgrey", alpha = .35) +
  geom_dotplot(data = props.wise.by.kid, aes(x = measure, y = prop.wise), binaxis = "y", stackratio = 1.25, stackdir = "center",
               position = position_nudge(x = -0.0), dotsize = .45, color = "white", fill = "black") +
  geom_errorbar(aes(ymin = lower.HPD, ymax = upper.HPD), size = .6, width = .2, color = "red", alpha = .85) +
  geom_point(shape = 23, size = 5, color = "red", fill = "white", stroke = 1.25) +
  ylim(0,1) +
  theme_bw() +
  theme(text = element_text(size=18), axis.ticks.y=element_blank(), axis.text.x = element_blank(),
        panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), plot.background = element_blank(), 
        panel.background = element_blank(), legend.title = element_blank(), strip.background = element_blank()) +
  labs(y = "Probability of choosing wisely", x = "")

# Nearly the same plot, but with scale numbers at the bottom:
ggplot(all.comps.model.post.long) +
  facet_wrap(~measure, nrow = 1) +
  geom_density(aes(y = sample), fill = "lightgrey", orientation = "y") +
  geom_dotplot(data = props.wise.by.kid, aes(x = -1, y = prop.wise), binaxis = "y",
               position = position_nudge(x = -0.15), dotsize = .45, color = "white", fill = "black") +
  geom_point(data = all.comps.model.plot, aes(x = 0, y = prob),shape = 23, size = 5, color = "red", fill = "red") +
  geom_errorbar(data = all.comps.model.plot, aes(x = 0, ymin = lower.HPD, ymax = upper.HPD), size = .6, width = 1, color = "red", alpha = .85) +
  ylim(0,1) +
  theme_bw() +
  theme(text = element_text(size=18), panel.grid.minor = element_blank(), panel.grid.major.x = element_blank()) +
  labs(x = "", y = "Probability of choosing wisely")

# Plot the predictions
mod.pred <- tibble(prob = rep(1, 4), 
                   study = c("Study 1", "Study 2", rep("Study 3", 2)),
                   measure = c("Pick 1 of 3", rep("Throw Away", 2), "Pick 1 of 2") %>% 
                     factor(levels = c("Pick 1 of 3", "Throw Away", "Pick 1 of 2")))
minimal.pred <- tibble(prob = c(.5, 1, 1, .5), 
                       study = c("Study 1", "Study 2", rep("Study 3", 2)),
                       measure = c("Pick 1 of 3", rep("Throw Away", 2), "Pick 1 of 2") %>% 
                         factor(levels = c("Pick 1 of 3", "Throw Away", "Pick 1 of 2")))
low.level.pred <- tibble(prob = rep(.5, 4), 
                         study = c("Study 1", "Study 2", rep("Study 3", 2)),
                         measure = c("Pick 1 of 3", rep("Throw Away", 2), "Pick 1 of 2") %>% 
                           factor(levels = c("Pick 1 of 3", "Throw Away", "Pick 1 of 2")))

all.pred <- minimal.pred %>%
  mutate(theory = "Minimal possibility") %>%
  rbind(mod.pred %>%
          mutate(theory = "Possibility concepts"), low.level.pred %>%
          mutate(theory = "Low-level strategies")) %>%
  mutate(theory = factor(theory, levels = c("Minimal possibility", "Possibility concepts", "Low-level strategies")),
         prob = c(.53, .97, .97, .53, 1, 1.03, 1.03, 1, .47, .5, .5, .47))

ggplot(all.pred, aes(x = measure, y = prob, group = "theory")) +
  facet_grid(.~ study, scales = "free_x", space = "free_x") +
  geom_point(aes(color = theory, shape = theory), size = 5, fill = "red") +
  scale_shape_manual(values = c(23, 15, 19)) +
  scale_color_manual(values = c("red", "black", "gray")) +
  theme_bw() +
  theme(text = element_text(size=18), axis.ticks.y=element_blank(), axis.text.x = element_blank(), legend.position="none",
        panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), plot.background = element_blank(),
        panel.background = element_blank(), legend.title = element_blank(), strip.background = element_blank()) +
  ylim(0,1.04) +
  labs(y = "Probability of choosing wisely", x = "") 

summary(all.comps.model)
posterior_interval(all.comps.model)

# Find the proportion of each posterior that is greater than 1/3
all.comps.model.post.long %>%
  group_by(measure) %>%
  summarize(prop.above.33 = prop.more.than.33(sample))

# Find the proportion of each posterior that is greater than 2/3
all.comps.model.post.long %>%
  group_by(measure) %>%
  summarize(prop.above.66 = prop.more.than.66(sample))

# How many samples are above 2/3?
all.comps.model.post.long %>%
  group_by(measure) %>%
  filter(sample > .66) %>%
  summarize(n.above.66 = length(sample))
# How many samples are above 1/2?
all.comps.model.post.long %>%
  group_by(measure) %>%
  filter(sample > .5) %>%
  summarize(n.above.5 = length(sample))

# I want to see the median for each 
all.comps.model.post.long %>%
  group_by(measure) %>%
  summarize(median = median(sample))

# Let's get that in log-odds too
emmeans(all.comps.model, specs = "measure") %>%
  insight::get_parameters() %>%
  sapply(median) 

# And let's get the quantiles of each posterior too
all.comps.model.post.long %>%
  group_by(measure) %>%
  summarize(quantile(sample))

# And let's have a ROPE around .5, = [.49, .51]

all.comps.model.post.long %>%
  group_by(measure) %>%
  summarize(rope(sample, ci = 1, range = c(.49, .51)))

# For comparison, let's look at the ROPE around the median, .51, = [.50, .52]
all.comps.model.post.long %>%
  group_by(measure) %>%
  summarize(rope(sample, ci = 1, range = c(.50, .52)))

# Now I want to find the proportion of hypotheses that are better than the hypothesis that they are choosing at chance.

# Make an object of all the samples of the posterior probability of choosing the target for 2.5-year-olds in 3-options tasks
pick.from.2.post <- all.comps.model.post.long %>%
  filter(measure == "Pick.from.2") %>%
  select(sample) 

# This function takes the object made at the line above and calculates the proportion that is between lb and ub (i.e., lower bound and upper bound)
rope.calculator.pick.from.2 <- function(lb, ub){
  length(pick.from.2.post$sample[pick.from.2.post$sample > lb & pick.from.2.post$sample < ub]) / nrow(pick.from.2.post)
}

# Set up a grid of hypotheses, so that we can evaluate the probability of each hypothesis
eval.rope.pick.from.2 <- tibble(grid = seq(.01, .99, length.out = 1000), 
                                lower.rope.bound = grid - .01,
                                upper.rope.bound = grid + .01,
                                rope.prob = NA) 

for(i in 1:nrow(eval.rope.pick.from.2)){
  eval.rope.pick.from.2$rope.prob[i] <- rope.calculator.pick.from.2(eval.rope.pick.from.2$lower.rope.bound[i], eval.rope.pick.from.2$upper.rope.bound[i])
}

rope.calculator.pick.from.2(.49, .51)# Here's the probability that they're choosing 50/50: .1476
rope.calculator.pick.from.2(.50, .52) # Here's the probability of the ROPE around the median
quantile(eval.rope.pick.from.2$rope.prob, probs = seq(.99, 1, length.out = 5)) # Of all hypotheses, .1476 is more likely than 99.25% of hypotheses

# Can I plot this in a meaningful way?

ggplot(eval.rope.pick.from.2, aes(x = grid, y = rope.prob)) +
  geom_bar(stat = "identity") +
  ylim(0, .165)

# Now let's just look at the hypotheses within the 95% CI

eval.rope.pick.from.2.95 <- tibble(grid = seq(.41, .62, length.out = 1000), # These numbers are the upper and lower bound of the 95% ci, ci(pick.from.2.post, method = "HDI")
                                   lower.rope.bound = grid - .01,
                                   upper.rope.bound = grid + .01,
                                   rope.prob = NA) 

for(i in 1:nrow(eval.rope.pick.from.2.95)){
  eval.rope.pick.from.2.95$rope.prob[i] <- rope.calculator.pick.from.2(eval.rope.pick.from.2.95$lower.rope.bound[i], eval.rope.pick.from.2.95$upper.rope.bound[i])
}

quantile(eval.rope.pick.from.2$rope.prob, probs = seq(.99, 1, length.out = 5)) # Of all hypotheses, .148 in the top 1% of hypotheses

# I need the medians again

all.comps.model.post.long %>%
  group_by(measure) %>%
  summarize(median(sample))

# Have a look at the pairwise comparisons
pairs(emmeans(all.comps.model, specs = "measure"))
# Sometimes I'll want to reverse these
pairs(emmeans(all.comps.model, specs = "measure"), reverse = TRUE)

# Let's get the posteriors for the pairwise comparisons
all.comps.model.pairs <- insight::get_parameters(pairs(emmeans(all.comps.model, specs = "measure"))) %>% 
  select(1:3) 

(pairs.medians.log.odds.ratios <- sapply(all.comps.model.pairs, median)) # Expressed in log odds:    -1.5972674    -0.9765004   0.4259430 
(pairs.medians.odds.ratios <- exp(pairs.medians.log.odds.ratios))        # Expressed as odds ratios:  0.2024490     0.3766268   1.5310335

# Next let's get the 95% CIs
(pairs.CIs.log.odds.ratios <- sapply(all.comps.model.pairs, ci, method = "HDI"))            # Expressed as log odds
# 80% CIs
(pairs.80.CIs.log.odds.ratios <- sapply(all.comps.model.pairs, ci, ci = 0.80, method = "HDI")) # Expressed as log odds

# Additional contrasts
additional.comps.pairs <- insight::get_parameters(pairs(emmeans(all.comps.model, specs = "measure"))) %>% 
  select(4:6) 

additional.comps.pairs.long <- additional.comps.pairs %>%
  pivot_longer(everything(), names_to = "contrast", values_to = "sample")

additional.comps.pairs.long %>%
  group_by(contrast) %>%
  summarize(median(sample))

additional.comps.pairs.long %>%
  group_by(contrast) %>%
  summarize(ci(sample, method = "HDI"))

# We also need to know the probability that performance was better in S3Q2 than in S1

all.comps.model.pairs %>%
  as_tibble() %>%
  select(3) %>% 
  rename(contrast = 1) %>%
  filter(contrast < 0) %>%
  summarize(prob.s3q2.easier = n() / nrow(all.comps.model.pairs))

all.comps.model.pairs %>%
  as_tibble() %>%
  select(3) %>% 
  rename(contrast = 1) %>%
  filter(contrast < -0.1) %>%
  summarize(prob.s3q2.easier = n() / nrow(all.comps.model.pairs))

all.comps.model.pairs %>%
  as_tibble() %>%
  select(3) %>% 
  rename(contrast = 1) %>%
  filter(contrast < -0.52) %>% # Here this is calculated from not-reversed contrasts
  summarize(prob.s3q2.easier = n() / nrow(all.comps.model.pairs))

## Age effects in 2.5- and 3-year-olds by study, age as categorical ----

# Load and shape data

# Mody and Carey 3 cups data
mc.age.df <- read.csv("mc.data.csv") %>% # Load original Mody and Carey data in long format
  as_tibble() %>%
  filter(age.group == 2 | age.group == 3, !grepl("test", trial)) %>%
  mutate(study = "Mody & Carey 2016",
         age.group = factor(age.group, levels = c(2, 3), labels = c("2.5yos", "3yos"))) %>%
  select(id, response, study, age.group) 

# Grigoroglou et al. 3 cups data
gr.age.df <- read_csv("grigoroglou.csv") %>% # Load original Grigoroglou et al. data in long format
  as_tibble() %>%
  select(id = ID, Trial, AgeMonths, response = Acc) %>%
  filter(!is.na(response), (AgeMonths > 30 & AgeMonths < 48), Trial == "Training") %>%
  mutate(age.group = ifelse(AgeMonths < 36, "2.5yos", "3yos"),
         study = "Grigoroglou et al. 2019", id = as.numeric(id)) %>%
  select(id, response, study, age.group)
  
chest.3cups.age <- read.csv("poe11.csv") %>%
  filter(Coins.exclusion == 0) %>%
  select(id = id.num, test.1.choice:test.4.choice) %>%
  pivot_longer(cols = test.1.choice:test.4.choice, names_to = "trial", values_to = "answer") %>%
  mutate(response = ifelse(answer == "certain", 1, 0),
         study = "Study 1") %>%
  select(id, response, study) %>%
  mutate(id = id + 2000,
         age.group = "3yos",
         age.group = factor(age.group, levels = c("2.5yos", "3yos")))

all.age.data <- rbind(mc.age.df, gr.age.df, chest.3cups.age) %>%
  mutate(study = factor(study, levels = c("Mody & Carey 2016", "Grigoroglou et al. 2019", "Study 1"))) %>%
  filter(!is.na(response))

# Calculate descriptive proportions correct for each study and age group in the "Pick from 3" studies

all.age.data %>%
  group_by(study, age.group) %>%
  summarise(prop.single = mean(response)) 

# Calculate descriptive proportions correct for each participant in each "Pick from 3" study 

props.correct.age <- all.age.data %>%
  group_by(id, study, age.group) %>%
  summarise(prop.single = mean(response)) %>%
  mutate(measure = "Pick from 3")

# Model data
# set.seed(4802)
# age.mod <- stan_glmer(response ~ age.group * study + (1|id), family = "binomial", data = all.age.data, iter = 3500, warmup = 1000)
# saveRDS(age.mod, "age.mod.RDS")
age.mod <- readRDS("age.mod.RDS")

# "Point estimates" and CIs for each age group and study
emmeans(age.mod, specs = "age.group", by = "study", type = "response")
# Pairwise comparisons, on the log odds scale
pairs(emmeans(age.mod, specs = "study", by = "age.group"))
pairs(emmeans(age.mod, specs = "study", by = "age.group"), reverse = TRUE)

## Age effects in 2.5- and 3-year-olds, ignoring study due to rank deficiency (no 2.5-year-olds in Study 1) ----

# set.seed(4802)
# age.mod.no.study <- stan_glmer(response ~ age.group + (1|id), family = "binomial", data = all.age.data, iter = 3500, warmup = 1000)
# saveRDS(age.mod.no.study, "age.mod.no.study.RDS")
age.mod.no.study <- readRDS("age.mod.no.study.RDS")

emmeans(age.mod.no.study, specs = "age.group", type = "response")
pairs(emmeans(age.mod.no.study, specs = "age.group"), reverse = TRUE)

age.mod.no.study.plot <- as_tibble(emmeans(age.mod.no.study, specs = "age.group", type = "response"))
age.mod.no.study.post <- insight::get_parameters(emmeans(age.mod.no.study, specs = "age.group", type = "response")) %>%
  as_tibble() %>%
  pivot_longer(everything(), names_to = "age.group", values_to = "sample") %>%
  mutate(age.group = factor(age.group))

ggplot(age.mod.no.study.plot, aes(x = age.group, y = prob)) +
  geom_violinhalf(data = age.mod.no.study.post, aes(y = sample), fill = "darkgrey", alpha = .5) +
  geom_errorbar(aes(ymin = lower.HPD, ymax = upper.HPD), width = .05) +
  geom_point(shape = 23, fill = "white") +
  geom_dotplot(data = props.correct.age, aes(x = age.group, y = prop.single), binaxis = "y", stackdir = "down",
               position = position_nudge(x = -0.05), dotsize = .45, color = "white", fill = "black") +
  ylim(0,1) +
  theme_bw() +
  labs(y = "Probability of choosing singleton")

age.mod.no.study.post %>%
  group_by(age.group) %>%
  summarize(cred = ci(sample, ci = .95),
            prop.less.than.5 = prop.less.than.5(sample),
            rope.size = rope(sample, range = c(.49, .51)),
            median = median(sample),
            lower.median.rope = median - .01,
            upper.median.rope = median + .01,
            median.rope = rope(sample, range = c(lower.median.rope, upper.median.rope)))

# Make an object of all the samples from within the 95% CI of the posterior probability of choosing the target for 2.5-year-olds in 3-options tasks
age.mod.no.study.post.2.5.95.ci.only <- age.mod.no.study.post %>%
  filter(age.group == "2.5yos") %>%
  filter(sample > .376 & sample < .566)

# This function takes the object made at the line above and calculates the proportion that is between lb and ub (i.e., lower bound and upper bound)
rope.calculator.age.mod.2.5.95 <- function(lb, ub){
  length(age.mod.no.study.post.2.5.95.ci.only$sample[age.mod.no.study.post.2.5.95.ci.only$sample > lb & age.mod.no.study.post.2.5.95.ci.only$sample < ub]) / nrow(age.mod.no.study.post.2.5.95.ci.only)
}

# Set up a grid of hypotheses, so that we can evaluate the probability of each hypothesis
eval.rope.age.mod.no.study.2.5.95 <- tibble(grid = seq(.376, .566, length.out = 1000),
                                         lower.rope.bound = grid - .01,
                                         upper.rope.bound = grid + .01,
                                         rope.prob = NA) 

for(i in 1:nrow(eval.rope.age.mod.no.study.2.5.95)){
  eval.rope.age.mod.no.study.2.5.95$rope.prob[i] <- rope.calculator.age.mod.2.5.95(eval.rope.age.mod.no.study.2.5.95$lower.rope.bound[i], eval.rope.age.mod.no.study.2.5.95$upper.rope.bound[i])
}

rope.calculator.age.mod.2.5.95(.49, .51) # Probability over the 95% CI is .143
rope.calculator.age.mod.2.5.95(.461, .481) # Probability of the median over the 95% CI is .169
quantile(eval.rope.age.mod.no.study.2.5.95$rope.prob, probs = seq(.68, .70, length.out = 5)) # About 30% of hypotheses are better than the hypothesis that they are choosing at 50%

ggplot(eval.rope.age.mod.no.study.2.5.95, aes(x = grid, y = rope.prob)) +
  geom_bar(stat = "identity")

# Now I want to repeat this looking at the full hypothesis space

# Make an object of all the samples from within the 95% CI of the posterior probability of choosing the target for 2.5-year-olds in 3-options tasks
age.mod.no.study.post.2.5 <- age.mod.no.study.post %>%
  filter(age.group == "2.5yos")

# This function takes the object made at the line above and calculates the proportion that is between lb and ub (i.e., lower bound and upper bound)
rope.calculator.age.mod.2.5 <- function(lb, ub){
  length(age.mod.no.study.post.2.5$sample[age.mod.no.study.post.2.5$sample > lb & age.mod.no.study.post.2.5$sample < ub]) / nrow(age.mod.no.study.post.2.5)
}

# Set up a grid of hypotheses, so that we can evaluate the probability of each hypothesis
eval.rope.age.mod.no.study.2.5 <- tibble(grid = seq(.01, .99, length.out = 1000),
                                            lower.rope.bound = grid - .01,
                                            upper.rope.bound = grid + .01,
                                            rope.prob = NA) 

for(i in 1:nrow(eval.rope.age.mod.no.study.2.5)){
  eval.rope.age.mod.no.study.2.5$rope.prob[i] <- rope.calculator.age.mod.2.5(eval.rope.age.mod.no.study.2.5$lower.rope.bound[i], eval.rope.age.mod.no.study.2.5$upper.rope.bound[i])
}

rope.calculator.age.mod.2.5(.49, .51) #.1360 
rope.calculator.age.mod.2.5(.46, .48) #.1620

quantile(eval.rope.age.mod.no.study.2.5$rope.prob, probs = seq(.93, .95, length.out = 5)) # Better than 94% of hypotheses

ggplot(eval.rope.age.mod.no.study.2.5, aes(x = grid, y = rope.prob)) +
  geom_bar(stat = "identity")

emmeans(age.mod.no.study, specs = "age.group", type = "response") %>%
  insight::get_parameters() %>%
  pivot_longer(everything(), names_to = "age.group", values_to = "sample") %>%
  group_by(age.group) %>%
  summarize(prop.above.33 = prop.more.than.33(sample))

## Analysis of error distribution ----

# Stipulate observed distribution
observed.dist <- c(0, 5, 9, 5, 5)

# Calculate expected values under hyp. 1: 60/40 split guessers/modal representers

num.guessers.h1 <- 24 * .6
num.modal.h1 <- 24 * .4

# Calculate binomial density for guessers
guessers.density.h1 <- dbinom(seq(0,4), 4, 1/3) 

# Calculate expected value for guessers by multiplying by number of guessers
expected.guessers.h1 <- guessers.density.h1 * num.guessers.h1

# Stipulate biniomial density for modal representers
modal.density.h1 <- c(0, 0, 0, 0, 1)

# Calculate expected value for modal representers by multiplying by number of modal representers
expected.modal.h1 <- modal.density.h1 * num.modal.h1

# Calculate expected value for full sample by adding the two subsamples together
expected.h1 <- expected.guessers.h1 + expected.modal.h1

# Calculate expected values under hyp. 2: 80/20 split minimal/modal representers
num.minimal.h2 <- 24 * .8
num.modal.h2 <- 24 * .2

# Calculate binomial density for minimal representers
minimal.density.h2 <- dbinom(seq(0,4), 4, 1/2) 

# Calculate expected value for minimal representers by multiplying by number of minimal representers
expected.minimal.h2 <- minimal.density.h2 * num.minimal.h2

# Stipulate biniomial density for modal representers
modal.density.h2 <- c(0, 0, 0, 0, 1)

# Calculate expected value for modal representers by multiplying by number of modal representers
expected.modal.h2 <- modal.density.h2 * num.modal.h2

# Calculate expected value for full sample by adding the two subsamples together
expected.h2 <- expected.minimal.h2 + expected.modal.h2

# Multinomial tests

xmulti(observed.dist, 
       expected.h1,   # Expected distribution from a mix of guessers + modal (60/40)
       detail = 2)

xmulti(observed.dist, 
       expected.h2,   # Expected distribution from a mix of minimal + modal (80/20)
       detail = 2)

## 2.5-year-olds in the combined M+C, Gr. et al. data

# This is the same process we just went through above with the current data, but I've condensed it a bit
observed.2.5 <- c(3, 21, 15, 2)
num.minimal <- 41
num.guesser <- 41 * .795
num.modal <- 41 * .205

expected.all.minimal <- dbinom(seq(0,3), 3, 1/2) * num.minimal
expected.guesser <- dbinom(seq(0,3), 3, 1/3) * num.guesser
expected.modal <- dbinom(seq(0,3), 3, 1) * num.modal
expected.guesser.modal.2.5 <- expected.guesser + expected.modal

# Multinomial tests
xmulti(observed.2.5, 
       expected.guesser.modal.2.5, # Expected distribution from 100% 79.5% guessers, 20.5% modal
       detail = 2)

xmulti(observed.2.5, 
       dbinom(seq(0,3), 3, 1/2),   # Expected distribution from 100% minimal
       detail = 2)

# 3-year-olds in the combined M+C, Gr. et al. data
# For the 3-year-olds I've just plugged the relevant calculations directly into the multinomial test
xmulti(c(4, 14, 16, 12),
       (dbinom(seq(0,3), 3, 1/2) * .8) + (dbinom(seq(0,3), 3, 1) * .2),
       detail = 2)

xmulti(c(4, 14, 16, 12),
       (dbinom(seq(0,3), 3, 1/3) * .6) + (dbinom(seq(0,3), 3, 1) * .4),
       detail = 2)



